Geospatial visualization

MACS 40700 University of Chicago

Geospatial visualization

  • History
  • Google Maps
  • Major components
    • Scale
    • Projection
    • Symbols

Map boundaries

  • Layer process
  • Fill in with:
    • Points
    • Symbols
    • Fills (choropleth)

Storing map boundaries

  • Geographic information system (GIS)
  • Specialized software
  • GIS in R

maps

  • Basic boundary files
  • The world
  • United States
    • Continental USA
    • States
    • Counties
  • France
  • Italy
  • New Zealand

map_data()

library(maps)

map_data("state") %>%
  as_tibble()
## # A tibble: 15,537 x 6
##     long   lat group order region  subregion
##  * <dbl> <dbl> <dbl> <int> <chr>   <chr>    
##  1 -87.5  30.4    1.     1 alabama <NA>     
##  2 -87.5  30.4    1.     2 alabama <NA>     
##  3 -87.5  30.4    1.     3 alabama <NA>     
##  4 -87.5  30.3    1.     4 alabama <NA>     
##  5 -87.6  30.3    1.     5 alabama <NA>     
##  6 -87.6  30.3    1.     6 alabama <NA>     
##  7 -87.6  30.3    1.     7 alabama <NA>     
##  8 -87.6  30.3    1.     8 alabama <NA>     
##  9 -87.7  30.3    1.     9 alabama <NA>     
## 10 -87.8  30.3    1.    10 alabama <NA>     
## # ... with 15,527 more rows

map_data()

library(gapminder)

# no group aesthetic
ggplot(gapminder, aes(year, lifeExp)) +
  geom_line()

# with grouping by country
ggplot(gapminder, aes(year, lifeExp, group = country)) +
  geom_line()

map_data()

map("state", "michigan")

Shapefiles

  • .shp stores the geographic coordinates of the geographic features
  • .dbf stores data associated with the geographic features
  • .prj stores information about the projection of the coordinates in the shapefile

Shapefiles

library(rgdal)

usa <- readOGR("data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/soltoffbc/Projects/Data Visualization/dataviz/notes/data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp", layer: "cb_2013_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
str(usa, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  52 obs. of  9 variables:
##   ..@ polygons   :List of 52
##   ..@ plotOrder  : int [1:52] 32 48 29 3 18 46 33 22 34 51 ...
##   ..@ bbox       : num [1:2, 1:2] -179.1 17.9 179.8 71.4
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot

Shapefiles

usa %>%
  fortify() %>%
  head()
##    long  lat order  hole piece id group
## 1 -88.5 31.9     1 FALSE     1  0   0.1
## 2 -88.5 31.9     2 FALSE     1  0   0.1
## 3 -88.5 31.9     3 FALSE     1  0   0.1
## 4 -88.5 31.9     4 FALSE     1  0   0.1
## 5 -88.5 32.0     5 FALSE     1  0   0.1
## 6 -88.5 32.0     6 FALSE     1  0   0.1

Shapefiles

usa@data %>%
  as_tibble
## # A tibble: 52 x 9
##    STATEFP STATENS  AFFGEOID    GEOID STUSPS NAME    LSAD  ALAND   AWATER 
##  * <fct>   <fct>    <fct>       <fct> <fct>  <fct>   <fct> <fct>   <fct>  
##  1 01      01779775 0400000US01 01    AL     Alabama 00    131172… 459492…
##  2 05      00068085 0400000US05 05    AR     Arkans… 00    134772… 295881…
##  3 06      01779778 0400000US06 06    CA     Califo… 00    403482… 204843…
##  4 09      01779780 0400000US09 09    CT     Connec… 00    125419… 181540…
##  5 12      00294478 0400000US12 12    FL     Florida 00    138897… 314136…
##  6 13      01705317 0400000US13 13    GA     Georgia 00    148962… 494780…
##  7 16      01779783 0400000US16 16    ID     Idaho   00    214045… 239773…
##  8 17      01779784 0400000US17 17    IL     Illino… 00    143793… 620168…
##  9 18      00448508 0400000US18 18    IN     Indiana 00    927895… 153667…
## 10 20      00481813 0400000US20 20    KS     Kansas  00    211752… 134671…
## # ... with 42 more rows

fortify()

# state name
usa %>%
  fortify(region = "NAME") %>%
  head
##    long  lat order  hole piece      id     group
## 1 -88.5 31.9     1 FALSE     1 Alabama Alabama.1
## 2 -88.5 31.9     2 FALSE     1 Alabama Alabama.1
## 3 -88.5 31.9     3 FALSE     1 Alabama Alabama.1
## 4 -88.5 31.9     4 FALSE     1 Alabama Alabama.1
## 5 -88.5 32.0     5 FALSE     1 Alabama Alabama.1
## 6 -88.5 32.0     6 FALSE     1 Alabama Alabama.1
# FIPS code
usa %>%
  fortify(region = "STATEFP") %>%
  head
##    long  lat order  hole piece id group
## 1 -88.5 31.9     1 FALSE     1 01  01.1
## 2 -88.5 31.9     2 FALSE     1 01  01.1
## 3 -88.5 31.9     3 FALSE     1 01  01.1
## 4 -88.5 31.9     4 FALSE     1 01  01.1
## 5 -88.5 32.0     5 FALSE     1 01  01.1
## 6 -88.5 32.0     6 FALSE     1 01  01.1
# keep it all
(usa2 <- usa %>%
  fortify(region = "NAME") %>%
  as_tibble %>%
  left_join(usa@data, by = c("id" = "NAME")))
## # A tibble: 46,174 x 15
##     long   lat order hole  piece id     group   STATEFP STATENS  AFFGEOID 
##    <dbl> <dbl> <int> <lgl> <fct> <chr>  <fct>   <fct>   <fct>    <fct>    
##  1 -88.5  31.9     1 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  2 -88.5  31.9     2 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  3 -88.5  31.9     3 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  4 -88.5  31.9     4 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  5 -88.5  32.0     5 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  6 -88.5  32.0     6 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  7 -88.5  32.1     7 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  8 -88.4  32.2     8 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
##  9 -88.4  32.2     9 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
## 10 -88.4  32.2    10 FALSE 1     Alaba… Alabam… 01      01779775 0400000U…
## # ... with 46,164 more rows, and 5 more variables: GEOID <fct>,
## #   STUSPS <fct>, LSAD <fct>, ALAND <fct>, AWATER <fct>

Simple features

  • Standard of how to represent real-world objects on a computer
  • Emphasizes spatial geometry
  • Widely implemented in commercial software
  • sf package

What is a feature?

  • A thing or an object in the real world
  • Frequently consist of other objects
  • Features have a geometry describing where on Earth the feature is located
  • They have attributes, which describe other properties of the feature

Dimensions

  • Geometries composed of points
  • Coordinates in 2-, 3- or 4-dimensional space
  • X and Y coordinates
  • Z coordinate
  • M coordinate

Simple feature geometry types

type description
POINT zero-dimensional geometry containing a single point
LINESTRING sequence of points connected by straight, non-self intersecting line pieces; one-dimensional geometry
POLYGON geometry with a positive area (two-dimensional); sequence of points form a closed, non-self intersecting ring; the first ring denotes the exterior ring, zero or more subsequent rings denote holes in this exterior ring
MULTIPOINT set of points; a MULTIPOINT is simple if no two Points in the MULTIPOINT are equal
MULTILINESTRING set of linestrings
MULTIPOLYGON set of polygons
GEOMETRYCOLLECTION set of geometries of any type except GEOMETRYCOLLECTION

Simple features in R

  • sf stores simple features as basic R data structures (lists, matrix, vectors, etc.)
  • Data frame storage
  • One row per feature
  • Geometries and list columns
  • The three classes used to represent simple features
    • sf
    • sfc
    • sfg

Example: North Carolina

library(sf)
nc <- st_read(system.file("shape/nc.shp", package="sf"))
## Reading layer `nc' from data source `/Library/Frameworks/R.framework/Versions/3.4/Resources/library/sf/shape/nc.shp' using driver `ESRI Shapefile'
## Simple feature collection with 100 features and 14 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -84.3 ymin: 33.9 xmax: -75.5 ymax: 36.6
## epsg (SRID):    4267
## proj4string:    +proj=longlat +datum=NAD27 +no_defs

Example: North Carolina

class(nc)
## [1] "sf"         "data.frame"

Example: North Carolina

attr(nc, "sf_column")
## [1] "geometry"

Example: North Carolina

print(nc[9:15], n = 3)
## Simple feature collection with 100 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -84.3 ymin: 33.9 xmax: -75.5 ymax: 36.6
## epsg (SRID):    4267
## proj4string:    +proj=longlat +datum=NAD27 +no_defs
## First 3 features:
##   BIR74 SID74 NWBIR74 BIR79 SID79 NWBIR79                       geometry
## 1  1091     1      10  1364     0      19 MULTIPOLYGON (((-81.5 36.2,...
## 2   487     0      10   542     3      12 MULTIPOLYGON (((-81.2 36.4,...
## 3  3188     5     208  3616     6     260 MULTIPOLYGON (((-80.5 36.2,...

Plotting boundaries with maps

# map of the world
map()

# usa boundaries
map("usa")

map("state")

# county map of illinois
map("county", "illinois")

Plotting boundaries with ggplot2

usa <- map_data("usa") %>%
  as_tibble
usa
## # A tibble: 7,243 x 6
##     long   lat group order region subregion
##  * <dbl> <dbl> <dbl> <int> <chr>  <chr>    
##  1 -101.  29.7    1.     1 main   <NA>     
##  2 -101.  29.7    1.     2 main   <NA>     
##  3 -101.  29.7    1.     3 main   <NA>     
##  4 -101.  29.6    1.     4 main   <NA>     
##  5 -101.  29.6    1.     5 main   <NA>     
##  6 -101.  29.6    1.     6 main   <NA>     
##  7 -101.  29.6    1.     7 main   <NA>     
##  8 -101.  29.6    1.     8 main   <NA>     
##  9 -101.  29.6    1.     9 main   <NA>     
## 10 -101.  29.6    1.    10 main   <NA>     
## # ... with 7,233 more rows

Simple black map

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group))

Simple black map

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group)) +
  coord_fixed()

Simple black map

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3)

Change the colors

ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group),
               fill = NA, color = "red") + 
  coord_fixed(1.3)

gg1 <- ggplot() + 
  geom_polygon(data = usa, aes(x = long, y = lat, group = group),
               fill = "violet", color = "blue") + 
  coord_fixed(1.3)
gg1

Always remember to use the group aesthetic

ggplot() + 
  geom_polygon(data = usa, aes(x = long, y = lat),
               fill = "violet", color = "blue") + 
  coord_fixed(1.3)

State maps

states <- map_data("state") %>%
  as_tibble()
states
## # A tibble: 15,537 x 6
##     long   lat group order region  subregion
##  * <dbl> <dbl> <dbl> <int> <chr>   <chr>    
##  1 -87.5  30.4    1.     1 alabama <NA>     
##  2 -87.5  30.4    1.     2 alabama <NA>     
##  3 -87.5  30.4    1.     3 alabama <NA>     
##  4 -87.5  30.3    1.     4 alabama <NA>     
##  5 -87.6  30.3    1.     5 alabama <NA>     
##  6 -87.6  30.3    1.     6 alabama <NA>     
##  7 -87.6  30.3    1.     7 alabama <NA>     
##  8 -87.6  30.3    1.     8 alabama <NA>     
##  9 -87.7  30.3    1.     9 alabama <NA>     
## 10 -87.8  30.3    1.    10 alabama <NA>     
## # ... with 15,527 more rows

State maps

ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, group = group), color = "white") + 
  coord_fixed(1.3)

State maps

ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, fill = region, group = group), color = "white") + 
  coord_fixed(1.3) +
  # turn off color legend
  theme(legend.position = "none")

Plot a subset of states

midwest <- subset(states, region %in% c("illinois", "indiana", "iowa",
                                        "kansas", "michigan", "minnesota",
                                        "missouri", "nebraska", "north dakota",
                                        "ohio", "south dakota", "wisconsin"))

ggplot(data = midwest) + 
  geom_polygon(aes(x = long, y = lat, group = group),
               fill = "palegreen", color = "black") + 
  coord_fixed(1.3)

But what about Alaska and Hawaii?

fiftystater

library(fiftystater)

data("fifty_states")
fifty_states %>%
  as_tibble
## # A tibble: 13,694 x 7
##     long   lat order hole  piece id      group    
##    <dbl> <dbl> <int> <lgl> <fct> <chr>   <fct>    
##  1 -85.1  32.0     1 FALSE 1     alabama Alabama.1
##  2 -85.1  31.9     2 FALSE 1     alabama Alabama.1
##  3 -85.1  31.9     3 FALSE 1     alabama Alabama.1
##  4 -85.1  31.8     4 FALSE 1     alabama Alabama.1
##  5 -85.1  31.8     5 FALSE 1     alabama Alabama.1
##  6 -85.1  31.7     6 FALSE 1     alabama Alabama.1
##  7 -85.1  31.7     7 FALSE 1     alabama Alabama.1
##  8 -85.1  31.7     8 FALSE 1     alabama Alabama.1
##  9 -85.1  31.6     9 FALSE 1     alabama Alabama.1
## 10 -85.0  31.6    10 FALSE 1     alabama Alabama.1
## # ... with 13,684 more rows
ggplot(data = fifty_states, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

From a shapefile

glimpse(usa2)
## Observations: 46,174
## Variables: 15
## $ long     <dbl> -88.5, -88.5, -88.5, -88.5, -88.5, -88.5, -88.5, -88....
## $ lat      <dbl> 31.9, 31.9, 31.9, 31.9, 32.0, 32.0, 32.1, 32.2, 32.2,...
## $ order    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ hole     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS...
## $ piece    <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ id       <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama"...
## $ group    <fct> Alabama.1, Alabama.1, Alabama.1, Alabama.1, Alabama.1...
## $ STATEFP  <fct> 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 0...
## $ STATENS  <fct> 01779775, 01779775, 01779775, 01779775, 01779775, 017...
## $ AFFGEOID <fct> 0400000US01, 0400000US01, 0400000US01, 0400000US01, 0...
## $ GEOID    <fct> 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 0...
## $ STUSPS   <fct> AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A...
## $ LSAD     <fct> 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...
## $ ALAND    <fct> 131172434095, 131172434095, 131172434095, 13117243409...
## $ AWATER   <fct> 4594920201, 4594920201, 4594920201, 4594920201, 45949...

From a shapefile

ggplot(data = usa2, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

From a shapefile

usa2 <- usa2 %>%
  filter(id != "Alaska", id != "Hawaii", id != "Puerto Rico")

ggplot(data = usa2, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray")

An sf object

glimpse(nc)
## Observations: 100
## Variables: 15
## $ AREA      <dbl> 0.114, 0.061, 0.143, 0.070, 0.153, 0.097, 0.062, 0.0...
## $ PERIMETER <dbl> 1.44, 1.23, 1.63, 2.97, 2.21, 1.67, 1.55, 1.28, 1.42...
## $ CNTY_     <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1836...
## $ CNTY_ID   <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1836...
## $ NAME      <fct> Ashe, Alleghany, Surry, Currituck, Northampton, Hert...
## $ FIPS      <fct> 37009, 37005, 37171, 37053, 37131, 37091, 37029, 370...
## $ FIPSNO    <dbl> 37009, 37005, 37171, 37053, 37131, 37091, 37029, 370...
## $ CRESS_ID  <int> 5, 3, 86, 27, 66, 46, 15, 37, 93, 85, 17, 79, 39, 73...
## $ BIR74     <dbl> 1091, 487, 3188, 508, 1421, 1452, 286, 420, 968, 161...
## $ SID74     <dbl> 1, 0, 5, 1, 9, 7, 0, 0, 4, 1, 2, 16, 4, 4, 4, 18, 3,...
## $ NWBIR74   <dbl> 10, 10, 208, 123, 1066, 954, 115, 254, 748, 160, 550...
## $ BIR79     <dbl> 1364, 542, 3616, 830, 1606, 1838, 350, 594, 1190, 20...
## $ SID79     <dbl> 0, 3, 6, 2, 3, 5, 2, 2, 2, 5, 2, 5, 4, 4, 6, 17, 4, ...
## $ NWBIR79   <dbl> 19, 12, 260, 145, 1197, 1237, 139, 371, 844, 176, 59...
## $ geometry  <MULTIPOLYGON [°]> MULTIPOLYGON (((-81.5 36.2,..., MULTIPO...

geom_sf()

ggplot() +
  geom_sf(data = nc)

ggmap

library(ggmap)
get_googlemap("Saieh Hall For Economics, East 58th Street, Chicago, IL", zoom = 10) %>%
  ggmap()

leaflet

Leaflet for R

Map projections

Changing map projections

ggplot(data = usa2, mapping = aes(x = long, y = lat, group = group)) + 
  geom_polygon(color = "black", fill = "gray") +
  coord_map() +
  ggtitle("Mercator projection (default)")

ggplot(data = usa2, mapping = aes(x = long, y = lat, group = group)) + 
  geom_polygon(color = "black", fill = "gray") +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50) +
  ggtitle("Albers equal-area projection")

ggplot(data = map_data("world"), mapping = aes(x = long, y = lat, group = group)) + 
  geom_polygon(color = "black", fill = "gray") +
  coord_map(projection = "mollweide", xlim = c(-180, 180)) +
  ggtitle("Mollweide projection")

Points

library(nycflights13)
airports
## # A tibble: 1,458 x 8
##    faa   name                   lat    lon   alt    tz dst   tzone        
##    <chr> <chr>                <dbl>  <dbl> <int> <dbl> <chr> <chr>        
##  1 04G   Lansdowne Airport     41.1  -80.6  1044   -5. A     America/New_…
##  2 06A   Moton Field Municip…  32.5  -85.7   264   -6. A     America/Chic…
##  3 06C   Schaumburg Regional   42.0  -88.1   801   -6. A     America/Chic…
##  4 06N   Randall Airport       41.4  -74.4   523   -5. A     America/New_…
##  5 09J   Jekyll Island Airpo…  31.1  -81.4    11   -5. A     America/New_…
##  6 0A9   Elizabethton Munici…  36.4  -82.2  1593   -5. A     America/New_…
##  7 0G6   Williams County Air…  41.5  -84.5   730   -5. A     America/New_…
##  8 0G7   Finger Lakes Region…  42.9  -76.8   492   -5. A     America/New_…
##  9 0P2   Shoestring Aviation…  39.8  -76.6  1000   -5. U     America/New_…
## 10 0S9   Jefferson County In…  48.1 -123.    108   -8. A     America/Los_…
## # ... with 1,448 more rows

Points

ggplot(airports, aes(lon, lat)) +
  geom_point()

Points

ggplot() + 
  coord_map() + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "gray") +
  geom_point(data = airports, aes(x = lon, y = lat), shape = 1)

Points

ggplot() + 
  coord_map(xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "gray") +
  geom_point(data = airports, aes(x = lon, y = lat), shape = 1)

Points

ggplot() + 
  coord_map(projection = "albers", lat0 = 25, lat1 = 50,
            xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "gray") +
  geom_point(data = airports, aes(x = lon, y = lat), shape = 1)

Symbols

ggplot() + 
  coord_map(xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "white") +
  geom_point(data = airports, aes(x = lon, y = lat, size = alt),
             fill = "grey", color = "black", alpha = .2) +
  theme_void() +
  theme(legend.position = "none")

Symbols

airports_n <- flights %>%
  count(dest) %>%
  left_join(airports, by = c("dest" = "faa"))

ggplot() + 
  coord_map(xlim = c(-130, -60),
            ylim = c(20, 50)) + 
  geom_polygon(data = usa2, mapping = aes(x = long, y = lat, group = group),
               color = "black", fill = "white") +
  geom_point(data = airports_n, aes(x = lon, y = lat, size = n),
             fill = "grey", color = "black", alpha = .2) +
  theme_void() +
  theme(legend.position = "none")

Drawing choropleth maps

usa <- readOGR("data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/soltoffbc/Projects/Data Visualization/dataviz/notes/data/census_bureau/cb_2013_us_state_20m/cb_2013_us_state_20m.shp", layer: "cb_2013_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
usa2 <- usa %>%
  fortify(region = "GEOID") %>%
  as_tibble %>%
  left_join(usa@data, by = c("id" = "GEOID")) %>%
  # filter out Alaska, Hawaii, Puerto Rico via FIPS codes
  filter(!(STATEFP %in% c("02", "15", "72")))

counties <- readOGR("data/census_bureau/cb_2013_us_county_20m/cb_2013_us_county_20m.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/soltoffbc/Projects/Data Visualization/dataviz/notes/data/census_bureau/cb_2013_us_county_20m/cb_2013_us_county_20m.shp", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
counties2 <- counties %>%
  fortify(region = "GEOID") %>%
  as_tibble %>%
  left_join(counties@data, by = c("id" = "GEOID")) %>%
  # filter out Alaska, Hawaii, Puerto Rico via FIPS codes
  filter(!(STATEFP %in% c("02", "15", "72")))

ggplot(counties2, mapping = aes(x = long, y = lat, group = group)) + 
  geom_polygon(color = "black", fill = "gray") +
  coord_map()

Drawing choropleth maps

(fb_state <- read_csv("data/census_bureau/ACS_13_5YR_B05012_state/ACS_13_5YR_B05012.csv") %>%
  mutate(rate = HD01_VD03 / HD01_VD01))
## # A tibble: 51 x 10
##    GEO.id GEO.id2 `GEO.display-la… HD01_VD01 HD02_VD01 HD01_VD02 HD02_VD02
##    <chr>  <chr>   <chr>                <int> <chr>         <int>     <int>
##  1 04000… 01      Alabama            4799277 <NA>        4631045      2881
##  2 04000… 02      Alaska              720316 <NA>         669941      1262
##  3 04000… 04      Arizona            6479703 <NA>        5609835      7725
##  4 04000… 05      Arkansas           2933369 <NA>        2799972      2568
##  5 04000… 06      California        37659181 <NA>       27483342     30666
##  6 04000… 08      Colorado           5119329 <NA>        4623809      5778
##  7 04000… 09      Connecticut        3583561 <NA>        3096374      5553
##  8 04000… 10      Delaware            908446 <NA>         831683      2039
##  9 04000… 11      District of Col…    619371 <NA>         534142      2017
## 10 04000… 12      Florida           19091156 <NA>       15392410     16848
## # ... with 41 more rows, and 3 more variables: HD01_VD03 <int>,
## #   HD02_VD03 <int>, rate <dbl>
(fb_county <- read_csv("data/census_bureau/ACS_13_5YR_B05012_county/ACS_13_5YR_B05012.csv") %>%
  mutate(rate = HD01_VD03 / HD01_VD01))
## # A tibble: 3,143 x 10
##    GEO.id GEO.id2 `GEO.display-la… HD01_VD01 HD02_VD01 HD01_VD02 HD02_VD02
##    <chr>  <chr>   <chr>                <int>     <int>     <int>     <int>
##  1 05000… 01001   Autauga County,…     54907        NA     54019       277
##  2 05000… 01003   Baldwin County,…    187114        NA    180308       802
##  3 05000… 01005   Barbour County,…     27321        NA     26517       145
##  4 05000… 01007   Bibb County, Al…     22754        NA     22486        86
##  5 05000… 01009   Blount County, …     57623        NA     55122       262
##  6 05000… 01011   Bullock County,…     10746        NA     10170       484
##  7 05000… 01013   Butler County, …     20624        NA     20467        91
##  8 05000… 01015   Calhoun County,…    117714        NA    114892       392
##  9 05000… 01017   Chambers County…     34145        NA     33786       101
## 10 05000… 01019   Cherokee County…     26034        NA     25849       106
## # ... with 3,133 more rows, and 3 more variables: HD01_VD03 <int>,
## #   HD02_VD03 <int>, rate <dbl>

Joining the data to regions

ggplot(fb_state, aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat)

Joining the data to regions

ggplot(fb_state, aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat) +
  scale_fill_continuous(labels = scales::percent) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

Joining the data to regions

ggplot(fb_county, aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  scale_fill_continuous(labels = scales::percent) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)

sf

ggplot() +
  geom_sf(data = nc, aes(fill = BIR74))

sf

(nc2 <- nc %>%
  select(SID74, SID79, geometry) %>%
  gather(VAR, SID, -geometry))
## Simple feature collection with 200 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -84.3 ymin: 33.9 xmax: -75.5 ymax: 36.6
## epsg (SRID):    4267
## proj4string:    +proj=longlat +datum=NAD27 +no_defs
## First 10 features:
##      VAR SID                       geometry
## 1  SID74   1 MULTIPOLYGON (((-81.5 36.2,...
## 2  SID74   0 MULTIPOLYGON (((-81.2 36.4,...
## 3  SID74   5 MULTIPOLYGON (((-80.5 36.2,...
## 4  SID74   1 MULTIPOLYGON (((-76 36.3, -...
## 5  SID74   9 MULTIPOLYGON (((-77.2 36.2,...
## 6  SID74   7 MULTIPOLYGON (((-76.7 36.2,...
## 7  SID74   0 MULTIPOLYGON (((-76 36.3, -...
## 8  SID74   0 MULTIPOLYGON (((-76.6 36.3,...
## 9  SID74   4 MULTIPOLYGON (((-78.3 36.3,...
## 10 SID74   1 MULTIPOLYGON (((-80 36.3, -...
ggplot() +
  geom_sf(data = nc2, aes(fill = SID)) +
  facet_wrap(~ VAR, ncol = 1)

Selecting color palettes

RColorBrewer

Color Brewer

Sequential

fb_county %>%
  mutate(rate_cut = cut_number(rate, 6)) %>%
  ggplot(aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate_cut), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50) +
  scale_fill_brewer(palette = "BuGn")

fb_county %>%
  mutate(rate_cut = cut_number(rate, 6)) %>%
  ggplot(aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate_cut), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50) +
  scale_fill_brewer(palette = "YlGn")

fb_county %>%
  mutate(rate_cut = cut_number(rate, 6)) %>%
  ggplot(aes(map_id = GEO.id2)) +
  geom_map(aes(fill = rate_cut), map = counties2) +
  expand_limits(x = counties2$long, y = counties2$lat) +
  labs(title = "Rate of foreign-born individuals in the population",
       fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50) +
  scale_fill_brewer(palette = "Blues")

Qualitative

state_data <- data_frame(name = state.name,
                         region = state.region,
                         subregion = state.division,
                         abb = state.abb) %>%
  bind_cols(as_tibble(state.x77)) %>%
  # get id variable into data frame
  left_join(usa2 %>%
              select(id, NAME) %>%
              distinct,
            by = c("name" = "NAME")) %>%
  # remove Alaska and Hawaii
  na.omit

# set region base plot
region_p <- ggplot(state_data, aes(map_id = id)) +
  geom_map(aes(fill = region), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat) +
  labs(fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)
region_p

# try different color brewers
region_p +
  scale_fill_brewer(palette = "Paired")

region_p +
  scale_fill_brewer(palette = "Dark2")

region_p +
  scale_fill_brewer(palette = "Pastel2")

# set subregion base plot
subregion_p <- ggplot(state_data, aes(map_id = id)) +
  geom_map(aes(fill = subregion), map = usa2) +
  expand_limits(x = usa2$long, y = usa2$lat) +
  labs(fill = NULL) +
  ggthemes::theme_map() +
  coord_map(projection = "albers", lat0 = 25, lat1 = 50)
subregion_p

subregion_p +
  scale_fill_brewer(palette = "Paired")

subregion_p +
  scale_fill_brewer(palette = "Set1")

subregion_p +
  scale_fill_brewer(palette = "Pastel1")